{
 "metadata": {
  "name": "",
  "signature": "sha256:df8c647192a450cb4d19e3b434665f7dfc780332f17b29a8a3b2b9ab23ec1aa6"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sp500_companies = pd.DataFrame.from_csv(\"../twitter/firms.csv\")\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sp500_companies[\"Symbols\"]= sp500_companies.index\n",
      "sp500_companies[\"Search\"]= sp500_companies.index+\",\"+sp500_companies[\"Keywords\"]\n",
      "print (sp500_companies[\"Search\"])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Symbol\n",
        "MMM                           MMM,3M co\n",
        "ABT             ABT,Abbott Laboratories\n",
        "ABBV                    ABBV,AbbVie inc\n",
        "ACN                       ACN,Accenture\n",
        "ACE                     ACE,ACE Limited\n",
        "ACT                     ACT,Actavis inc\n",
        "ADBE                     ADBE,Adobe inc\n",
        "AES                        AES,Aes Corp\n",
        "AET                       AET,Aetna inc\n",
        "AFL                       AFL,Aflac inc\n",
        "A                         A,Agilent inc\n",
        "GAS                         GAS,AGL inc\n",
        "APD       APD,air producs chemicals inc\n",
        "ARG                      ARG,airgas inc\n",
        "AKAM                    AKAM,akamai inc\n",
        "...\n",
        "WFM             WFM,whole foods inc\n",
        "WIN             WIN,windstream corp\n",
        "WEC       WEC,wisconsin energy corp\n",
        "WYN                WYN,wyndham corp\n",
        "WYNN          WYNN,wynn resorst ltd\n",
        "XEL             XEL,xcel energy inc\n",
        "XRX                  XRX,xerox corp\n",
        "XLNX                XLNX,xilinx inc\n",
        "XL                  XL,xl group plc\n",
        "XYL                   XYL,xylem inc\n",
        "YHOO                 YHOO,yahoo inc\n",
        "YUM                     YUM,yum inc\n",
        "ZMH         ZMH,zimmer holdings inc\n",
        "ZION             ZION,zions bancorp\n",
        "ZTS                  ZTS,zoetis inc\n",
        "Name: Search, Length: 501, dtype: object\n"
       ]
      }
     ],
     "prompt_number": 159
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print (sp500_companies.columns)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Index(['Name', 'Sector', 'Keywords', 'Symbols'], dtype='object')\n"
       ]
      }
     ],
     "prompt_number": 45
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tarek=\"MMM this is a book\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 68
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import re"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "pair=dict()\n",
      "pair['stocks']=sp500_companies.index\n",
      "pair['name']=sp500_companies['Name']\n",
      "pp = dict(zip(sp500_companies.index,sp500_companies['Name']))\n",
      "print(pp['ECL'])\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Ecolab Inc.\n"
       ]
      }
     ],
     "prompt_number": 26
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 48
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print(re.sub('(\\w+)',pp['\\g<1>'],tarek))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "KeyError",
       "evalue": "'\\\\g<1>'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-54-d828a3d9fd24>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mre\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'(\\w+)'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpp\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'\\g<1>'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtarek\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[1;31mKeyError\u001b[0m: '\\\\g<1>'"
       ]
      }
     ],
     "prompt_number": 54
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print (sp500_companies.Sector[sp500_companies.index == 'MMM'])\n",
      "        "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Symbol\n",
        "MMM       Industrials\n",
        "Name: Sector, dtype: object\n"
       ]
      }
     ],
     "prompt_number": 90
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def replace_stock_symbol_by_name(stocksymbols):\n",
      "    global sp500_companies\n",
      "\n",
      "    return(\",\".join(\"{0}\".sp500_companies.Name[sp500_companies.index == stock] for stock in stocksymbols))\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 94
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "replace_stock_symbol_by_name([\"$MMM\"])b"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "TypeError",
       "evalue": "sequence item 0: expected str instance, Series found",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-95-b91e70e78608>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mreplace_stock_symbol_by_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"$MMM\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[1;32m<ipython-input-94-66ce7d33bc9a>\u001b[0m in \u001b[0;36mreplace_stock_symbol_by_name\u001b[1;34m(stocksymbols)\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[1;32mglobal\u001b[0m \u001b[0msp500_companies\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[1;32mreturn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\",\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msp500_companies\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mName\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0msp500_companies\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mstock\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mstock\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mstocksymbols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[1;31mTypeError\u001b[0m: sequence item 0: expected str instance, Series found"
       ]
      }
     ],
     "prompt_number": 95
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tweet = \"$MMM $A $VZ $tarek\"\n",
      "import re\n",
      "match = re.findall(\"\\$\\w+\",tweet)\n",
      "print (match)\n",
      "stipmatch = [matchs.lstrip('$') for matchs in match] \n",
      "print (stipmatch)\n",
      "\n",
      "#stip = sp500_companies[sp500_companies.Symbols in stipmatch]\n",
      "\n",
      "for term in stipmatch:\n",
      "    print(sp500_companies.Name.get(term,\"yo\"))\n",
      "    #for term in stipmatch:\n",
      "#    print (sp500_companies.Name[term)\n",
      "\n",
      "#names = \",\".join(sp500_companies.Name[term] for term in stipmatch)\n",
      "#print (names)\n",
      "\n",
      "\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "['$MMM', '$A', '$VZ', '$tarek']\n",
        "['MMM', 'A', 'VZ', 'tarek']\n",
        "3M Co.\n",
        "Agilent Technologies Inc\n",
        "Verizon Communications\n",
        "yo\n"
       ]
      }
     ],
     "prompt_number": 14
    }
   ],
   "metadata": {}
  }
 ]
}